<?php

ini_set('max_execution_time','100');
header('Content-type:text/html; charset=utf-8');
function sina()
{
    $html = file_get_contents('http://feed.mix.sina.com.cn/api/roll/get?pageid=153&lid=2510&k=&num=300&page=1');
    preg_match_all("/\"url\":\"(.*?)\",/", $html, $urls);
    
    $titles = array(); // 用于保存标题的数组
    
    foreach ($urls['1'] as $url) {
        $wz_url = str_replace('https', 'http', $url);
        $wz_url = str_replace('\\', '', $wz_url);
        $html = file_get_contents($wz_url);
        preg_match("/<h1 class=\"main-title\">(.*?)<\/h1>/", $html, $title);
        
        if (!empty($title[1])) {
            $titles[] = $title[1]; // 将标题添加到数组中
        }
        
        preg_match("/<div class=\"article\" id=\"article\">.*?<p class=\"show_author\">/s", $html, $contents);
        preg_match_all("/<p>(.*?)<\/p>/", $contents['0'], $juzis);

        foreach ($juzis['1'] as $juzi) {
            $wz_juzi = ltrim($juzi, "　");
            $wz_juzi = preg_replace("/<.*?>/", '', $wz_juzi);
            
            if (!strstr($juzi, '原标题')) {
                if (mb_strlen($wz_juzi, 'UTF-8') > 60) {
                     file_put_contents(str_replace('\\','/',__DIR__).'/data/juzi/'. date("ymdHi") .'.txt', $juzi . PHP_EOL, FILE_APPEND | LOCK_EX);
                }
            }
        }
    }
    
    return $titles; // 返回标题数组
}

$titles = sina(); // 获取标题数组

if (!empty($titles)) {
    echo end($titles) . "句子采集完成" . "<br>"; // 输出最后一个标题
} else {
    //echo "无法获取标题信息";
}

$url = 'https://api.weibo.cn/2/guest/cardlist?gsid=_2A25IkrLcDeRxGeNL7FoS-SbKyj-IHXVRSXgirDV6PUNbkdAKLRHlkWpNSl-tuyWLzqJSx0ENDeOG0I_g_YtJdRTH&containerid=106003%26filter_type%3Drealtimehot%26since_id%3D6856638187310474819&page_type=searchall';
$curl = curl_init();

curl_setopt_array($curl, [
    CURLOPT_URL => $url,
    CURLOPT_RETURNTRANSFER => true,
]);

$response = curl_exec($curl);
curl_close($curl);

$json = json_decode($response);
$data = $json->cards[0]->card_group;
$result = [];

foreach ($data as $item) {
    if (isset($item->itemid)) {
        $text = $item->itemid;
    } else {
        $text = $item->title_sub;
    }
    
    preg_match_all('/[\x{4e00}-\x{9fa5}]+/u', $text, $matches);
    $result[] = implode('', $matches[0]);
}

file_put_contents(str_replace('\\','/',__DIR__).'/data/title/'.date("Y-m-d").'_'.mt_rand(1000, 9999).'.txt', implode("\n", $result), LOCK_EX);
file_put_contents(str_replace('\\','/',__DIR__).'/data/reci/'.date("Y-m-d").'_'.mt_rand(1000, 9999).'.txt', implode("\n", $result), LOCK_EX);
echo "采集完成。" . PHP_EOL;

$url_list = [
    'https://tophub.today/n/Jb0vmloB1G',
    'https://tophub.today/n/KqndgxeLl9',
    'https://tophub.today/n/rx9oz6oXbq',
    'https://tophub.today/n/mproPpoq6O',
    'https://tophub.today/n/wWmoO5Rd4E',
    'https://tophub.today/n/74KvxwokxM',
];

foreach ($url_list as $url) {
    $curl = curl_init();
    
    curl_setopt_array($curl, [
        CURLOPT_URL => $url,
        CURLOPT_RETURNTRANSFER => true,
        CURLOPT_SSL_VERIFYPEER => false,
        CURLOPT_USERAGENT => 'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)'
    ]);
    
    $response = curl_exec($curl);
    curl_close($curl);
    $result = [];
    preg_match_all('/<td class="al">(.+?)<\/td>/', $response, $matches);
    
    if (count($matches[1]) > 0) {
        $result = $matches[1];
    }
    
    $texts = array_map('strip_tags', $result);
    
    if (!empty($texts)) {
		$file_path_title = __DIR__ . '/data/title/' . date("Y-m-d") . '_' . mt_rand(1000, 9999) . '.txt';
		$file_path_title = __DIR__ . '/data/reci/' . date("Y-m-d") . '_' . mt_rand(1000, 9999) . '.txt';
        file_put_contents($file_path_title, implode("\n", $texts), LOCK_EX);
        echo '已保存' . PHP_EOL;
    } else {
        echo '不可采集。' . PHP_EOL;
    }
}